# Import required R libraries
library(fpp3)

Exercise 3.1

Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

global_economy %>%
  autoplot(GDP/Population, show.legend=FALSE) +
  labs(title= "GDP per capita", y = "$US")

global_economy %>%
  filter(Year == "2017") %>%
  mutate(GdpPerPop = GDP/Population) %>%
  arrange(desc(GdpPerPop))
## # A tsibble: 262 x 10 [1Y]
## # Key:       Country [262]
##    Country Code   Year     GDP Growth   CPI Imports Exports Population GdpPerPop
##    <fct>   <fct> <dbl>   <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>     <dbl>
##  1 Luxemb… LUX    2017 6.24e10   2.30 111.    194.    230.      599449   104103.
##  2 Macao … MAC    2017 5.04e10   9.10 136.     32.0    79.4     622567    80893.
##  3 Switze… CHE    2017 6.79e11   1.09  98.3    53.9    65.0    8466017    80190.
##  4 Norway  NOR    2017 3.99e11   1.92 115.     33.1    35.5    5282223    75505.
##  5 Iceland ISL    2017 2.39e10   3.64 122.     42.8    47.0     341284    70057.
##  6 Ireland IRL    2017 3.34e11   7.80 105.     87.9   120.     4813608    69331.
##  7 Qatar   QAT    2017 1.67e11   1.58 116.     37.3    51.0    2639211    63249.
##  8 United… USA    2017 1.94e13   2.27 112.     NA      NA    325719178    59532.
##  9 North … NAC    2017 2.10e13   2.35  NA      NA      NA    362492702    58070.
## 10 Singap… SGP    2017 3.24e11   3.62 113.    149.    173.     5612253    57714.
## # … with 252 more rows

Exercise 3.2

For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

A

United States GDP from global_economy

global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP/Population) +
  labs(title= "GDP per capita", y = "$US")

Population adjustment

B

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.

aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers" &
           State == "Victoria") %>%
  mutate(DailyAvgByMonth = Count / days_in_month(Month)) %>%
  autoplot(DailyAvgByMonth) +
  labs(title= "Slaughter of Victorian Bulls, bullocks and steers", y = "Daily Average by Month")

# Help from: https://stackoverflow.com/questions/30037722/daily-average-to-monthly-total-in-r

Apply calendar adjustment with days_in_month from lubridate library.

C

Victorian Electricity Demand from vic_elec.

victs <- vic_elec %>%
  index_by(Date) %>%
  ungroup() %>%
  select(-c(Time)) %>%
  as_tsibble(index=Date, key=Demand)


head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE
head(victs)
## # A tsibble: 6 x 5 [1D]
## # Key:       Demand [6]
##   Demand Temperature Date       Holiday Time               
##    <dbl>       <dbl> <date>     <lgl>   <dttm>             
## 1  2858.        13.8 2014-03-16 FALSE   2014-03-16 04:30:00
## 2  2870.        13.8 2014-03-16 FALSE   2014-03-16 05:00:00
## 3  2871.        13.8 2014-03-16 FALSE   2014-03-16 04:00:00
## 4  2877.        14.9 2012-12-25 TRUE    2012-12-25 05:30:00
## 5  2903.        14.8 2012-12-25 TRUE    2012-12-25 05:00:00
## 6  2905.        13.1 2013-12-25 TRUE    2013-12-25 05:30:00
#  as_tsibble(index = Date, key = Temperature) %>%
#  autoplot(DailyTotal) +
#  labs(title= "Electricity Demand", y = "Daily Total (in MW)")

group_by(Date) %>% summarise(DailyTotal = sum(Demand)) %>% mutate(Quarter = yearquarter(Quarter)) %>% as_tsibble(index = Date, key = DailyTotal) Calendar adjustment, to track by day instead of per 30-minute intervals

D

Gas production from aus_production.

# From section 3.1
lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

#aus_production %>%
#  autoplot(Gas) +
#  labs(title= "Gas Production", y = "Petajoule")

Box-Cox (mathematical transformation)

Exercise 3.3

Why is a Box-Cox transformation unhelpful for the canadian_gas data?

canadian_gas %>%
  autoplot(Volume) +
  labs(title= "Monthly Canadian Gas Production", y = "Billions of cubic meters")

lambda <- canadian_gas %>%
  features(Volume, features = guerrero) %>%
  pull(lambda_guerrero)
canadian_gas %>%
  autoplot(box_cox(Volume, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

The Box-Cox transformations is unhelpful for canadian_gas because the seasonal variation is already about the same across the whole series. As seen above in the initial plot without transformation and the second plot with a Box-Cox transformation, the transformation doesn’t necessarily tease out the season variation. If anything, the transformation diminishes the impact of the large seasonal swings between 1978 through 1990.

Exercise 3.4

What Box-Cox transformation would you select for your retail data (from Exercise 8 in Section 2.10)?

set.seed(8675309)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries, Turnover) +
  labs(title = "Turnover in Queensland Takeaway food services",
       subtitle = "Series ID: A3349767W",
       y = "Turnover")

lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
myseries %>%
  autoplot(box_cox(Turnover, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed food services turnover with $\\lambda$ = ",
         round(lambda,2))))

Exercise 3.5

For the following series, find an appropriate Box-Cox transformation in order to stabilize the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.

A - aus_production

# Tobacco
lambda <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Tobacco, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed tobacco production with $\\lambda$ = ",
         round(lambda,2))))

B - ansett

# Economy class passengers between Melbourne and Sydney
lambda <- ansett %>%
  filter(Airports == 'MEL-SYD' &
           Class == 'Economy') %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)

ansett %>%
  filter(Airports == 'MEL-SYD' &
           Class == 'Economy') %>%
  autoplot(box_cox(Passengers, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed economy passengers between Mel and Syd with $\\lambda$ = ",
         round(lambda,2))))

C - pedestrian

# Pedestrian counts at Southern Cross Station
lambda <- pedestrian %>%
  filter(Sensor == 'Southern Cross Station') %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)

pedestrian %>%
  filter(Sensor == 'Southern Cross Station') %>%
  autoplot(box_cox(Count, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed pedestrian count at Southern Cross Station with $\\lambda$ = ",
         round(lambda,2))))

Exercise 3.7

Consider the last five years of the Gas data from aus_production.

gas <- tail(aus_production, 5*4) %>% select(Gas)

head(gas)
## # A tsibble: 6 x 2 [1Q]
##     Gas Quarter
##   <dbl>   <qtr>
## 1   221 2005 Q3
## 2   180 2005 Q4
## 3   171 2006 Q1
## 4   224 2006 Q2
## 5   233 2006 Q3
## 6   192 2006 Q4

A

Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

gas %>%
  autoplot(Gas)

Trend-cycle shows an increase over the past five years. And the seasonal variance shows lows in Q1 and highs in Q3.

B

Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

# From section 3.4
dc <- gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() 

dc %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")

C

Do the results support the graphical interpretation from part A?

Yes, the trend line shows an increase from left to right with a plateau in the middle. The seasonal indices shows an almost perfect seasonal variance over the five-year window.

D

Compute and plot the seasonally adjusted data.

dc %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Petajoules",
       title = "Gas production in Australia") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

E

Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

# Outlier in beginning

gas_OutFront <- gas
gas_OutFront$Gas[1] <- gas_OutFront$Gas[1] + 300

of_dc <- gas_OutFront %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components()

of_dc %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")

of_dc %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Petajoules",
       title = "Gas production in Australia") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

# Outlier in middle
gas_OutMid <- gas
gas_OutMid$Gas[11] <- gas_OutMid$Gas[11] + 300

om_dc <- gas_OutMid %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components()

om_dc %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")

om_dc %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Petajoules",
       title = "Gas production in Australia") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

# Outlier in back
gas_OutBack <- gas
gas_OutBack$Gas[20] <- gas_OutBack$Gas[20] + 300

ob_dc <- gas_OutBack %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components()

ob_dc %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")

ob_dc %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Petajoules",
       title = "Gas production in Australia") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

F

Does it make any difference if the outlier is near the end rather than in the middle of the time series?

Exercise 3.8

Recall your retail time series data (from Exercise 8 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

library(seasonal)
set.seed(8675309)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

x11_dcmp <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()

autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of Turnover in Queensland Takeaway food services using X-11.")

Yes, the seasonal variance flips over time, and there are a few outliers as identified from the “irregular” chart.

Exercise 3.9

Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

A

Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

B

Is the recession of 1991/1992 visible in the estimated components?